
// cell noise implementation

// Note that the old implementation for qfloor(): 
//     x < 0.f ? 4294967295 - uint(-x) : uint(x);
// was wrong for exact negative integers
//     x < 0.f ? 4294967295 - uint(-floor(x)-1) : uint(x);
// however gives the right result. Alternatively
//     x < 0.f ? 4294967295 - uint(ceil(-x)-1) : uint(x);
// also works. And even
//     x < 0.f ? 4294967295 - uint(-floor(x+1.0)) : uint(x);
// Yes - the double-negations look crazy, but trust me on 
// the data types... you can't add a neative signed integer 
// to an unsigned one... but you can subtract a positive 
// unsigned ineteger from it. One would naively think those
// are the same things, but noooo... :)
uint qfloor1(in float x)
{
	return x < 0.f ? 4294967295 - uint(-floor(x)-1) : uint(x);
}

uint2 qfloor2(in float2 x)
{
	return x < ((float2)0.f) ? ((uint2)4294967295) - uint2(-floor(x)-1) : uint2(x);
}

uint3 qfloor3(in float3 x)
{
	return x < ((float3)0.f) ? ((uint3)4294967295) - uint3(-floor(x)-1) : uint3(x);
}

uint4 qfloor4(in float4 x)
{
	return x < ((float4)0.f) ? ((uint4)4294967295) - uint4(-floor(x)-1) : uint4(x);
}

float bits_to_01(in uint bits)
{
	return bits / 4294967295.f;
}

uint rotl32(in uint x, in int k)
{
	return (x << k) | (x >> (32 - k));
}

void bjmix(inout uint a, inout uint b, inout uint c)
{
	a -= c;  a ^= rotl32(c, 4);  c += b;
	b -= a;  b ^= rotl32(a, 6);  a += c;
	c -= b;  c ^= rotl32(b, 8);  b += a;
	a -= c;  a ^= rotl32(c, 16);  c += b;
	b -= a;  b ^= rotl32(a, 19);  a += c;
	c -= b;  c ^= rotl32(b, 4);  b += a;
}

uint bjfinal(in uint a, in uint b, in uint c)
{
	c ^= b; c -= rotl32(b, 14);
	a ^= c; a -= rotl32(c, 11);
	b ^= a; b -= rotl32(a, 25);
	c ^= b; c -= rotl32(b, 16);
	a ^= c; a -= rotl32(c, 4);
	b ^= a; b -= rotl32(a, 14);
	c ^= b; c -= rotl32(b, 24);
	return c;
}

uint inthash5(in uint3 k0, in uint2 k1)
{
	uint3 abc = 3735928559 + (5 << 2) + 13;

	abc += k0;
	bjmix(abc[0], abc[1], abc[2]);
	
	abc.xy += k1;
	return bjfinal(abc[0], abc[1], abc[2]);
}

uint inthash4(in uint3 k0, in uint k1)
{
	uint3 abc = 3735928559 + (4 << 2) + 13;
	
	abc += k0;
	bjmix(abc[0], abc[1], abc[2]);
	
	abc.x += k1;
	return bjfinal(abc[0], abc[1], abc[2]);
}

uint inthash3(in uint3 k0)
{
	uint3 abc = 3735928559 + (3 << 2) + 13;

	abc += k0;
	return bjfinal(abc[0], abc[1], abc[2]);
}

uint inthash2(in uint2 k0)
{
	uint3 abc = 3735928559 + (2 << 2) + 13;

	abc.xy += k0;
	return bjfinal(abc[0], abc[1], abc[2]);
}

uint inthash1(in uint k0)
{
	uint3 abc = 3735928559 + (1 << 2) + 13;

	abc.x += k0;
	return bjfinal(abc[0], abc[1], abc[2]);
}

float cellnoise_1_1(float k)
{
	return bits_to_01(inthash1(qfloor1(k)));
}

float cellnoise_1_2(float2 k)
{
	return bits_to_01(inthash2(qfloor2(k)));
}

float cellnoise_1_3(float3 k)
{
	return bits_to_01(inthash3(qfloor3(k)));
}

float cellnoise_1_4(float4 k)
{
	uint4 ku = qfloor4(k);
	return bits_to_01(inthash4(ku.xyz, ku.w));
}

float3 cellnoise_3_1(float k)
{
	float3 result;

	uint2 kn;
	kn.x = qfloor1(k);

	kn.y = 0; result.x = bits_to_01(inthash2(kn));
	kn.y = 1; result.y = bits_to_01(inthash2(kn));
	kn.y = 2; result.z = bits_to_01(inthash2(kn));

	return result;
}

float3 cellnoise_3_2(float2 k)
{
	float3 result;

	uint3 kn;
	kn.xy = qfloor2(k.xy);

	kn.z = 0; result.x = bits_to_01(inthash3(kn));
	kn.z = 1; result.y = bits_to_01(inthash3(kn));
	kn.z = 2; result.z = bits_to_01(inthash3(kn));

	return result;
}

float3 cellnoise_3_3(float3 k)
{
	float3 result;

	uint3 kn0;
	uint  kn1;
	kn0 = qfloor3(k);

	kn1 = 0; result.x = bits_to_01(inthash4(kn0, kn1));
	kn1 = 1; result.y = bits_to_01(inthash4(kn0, kn1));
	kn1 = 2; result.z = bits_to_01(inthash4(kn0, kn1));

	return result;
}

float3 cellnoise_3_4(float4 k)
{
	float3 result;

	uint3 kn0;
	uint2  kn1;
	kn0 = qfloor3(k.xyz);
	kn1.x = qfloor1(k.w);

	kn1.y = 0; result.x = bits_to_01(inthash5(kn0, kn1));
	kn1.y = 1; result.y = bits_to_01(inthash5(kn0, kn1));
	kn1.y = 2; result.z = bits_to_01(inthash5(kn0, kn1));

	return result;
}

// perlin noise implementation

float floorfrac1(in float x, out int i)
{
    i = floor(x);
	return x - i;
}

float2 floorfrac2(in float2 x, out int2 i)
{
    i = floor(x);
	return x - i;
}

float3 floorfrac3(in float3 x, out int3 i)
{
    i = floor(x);
	return x - i;
}

float4 floorfrac4(in float4 x, out int4 i)
{
    i = floor(x);
	return x - i;
}

float fade1(in float t)
{
	return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
}

float2 fade2(in float2 t)
{
	return t * t * t * (t * (t * 6.0f - (float2)15.0f) + (float2)10.0f);
}

float3 fade3(in float3 t)
{
	return t * t * t * (t * (t * 6.0f - (float3)15.0f) + (float3)10.0f);
}

float4 fade4(in float4 t)
{
	return t * t * t * (t * (t * 6.0f - (float4)15.0f) + (float4)10.0f);
}

float grad1(in int hash, in float x)
{
	int   h = hash & 15;
	float g = 1 + (h & 7);
	if (h & 8) g = -g;
	return g * x;
}

float grad2(in int hash, in float x, in float y)
{
	int   h = hash & 7;
	float u = h < 4 ? x : y;
	float v = 2.0f * ( h < 4 ? y : x);
	
	return ((h & 1) ? -u : u) + ((h & 2) ? -v : v);
}

float grad3(in int hash, in float x, in float y, in float z)
{
	int   h = hash & 15;
	float u = h < 8 ? x : y;
	float v = h < 4 ? y : (h == 12 || h == 14 ? x : z);

	return ((h & 1) ? -u : u) + ((h & 2) ? -v : v);
}

float grad4(in int hash, in float x, in float y, in float z, in float w)
{
	int h = hash & 31;
	float u = h < 24 ? x : y;
	float v = h < 16 ? y : z;
	float s = h < 8 ? z : w;
	return ((h & 1) ? -u : u) + ((h & 2) ? -v : v) + ((h & 4) ? -s : s);
}

float3 vgrad1(in int3 hash, float x)
{
	return float3(grad1(hash.x, x), grad1(hash.y, x), grad1(hash.z, x));
}

float3 vgrad2(in int3 hash, float x, float y)
{
	return float3(grad2(hash.x, x, y), grad2(hash.y, x, y), grad2(hash.z, x, y));
}

float3 vgrad3(in int3 hash, float x, float y, float z)
{
	return float3(grad3(hash.x, x, y, z), grad3(hash.y, x, y, z), grad3(hash.z, x, y, z));
}

float3 vgrad4(in int3 hash, float x, float y, float z, float w)
{
	return float3(grad4(hash.x, x, y, z, w), grad4(hash.y, x, y, z, w), grad4(hash.z, x, y, z, w));
}

float scale1(in float result) { return 0.2500f * result; }
float scale2(in float result) { return 0.6616f * result; }
float scale3(in float result) { return 0.9820f * result; }
float scale4(in float result) { return 0.8344f * result; }

float3 scale1v(in float3 result) { return 0.2500f * result; }
float3 scale2v(in float3 result) { return 0.6616f * result; }
float3 scale3v(in float3 result) { return 0.9820f * result; }
float3 scale4v(in float3 result) { return 0.8344f * result; }

float bilerp(in float v0, in float v1, in float v2, in float v3, in float s, in float t)
{
	float s1 = lerp(v0, v1, s);
	float s2 = lerp(v2, v3, s);

	return lerp(s1, s2, t);
}

float3 bilerpv(in float3 v0, in float3 v1, in float3 v2, in float3 v3, in float s, in float t)
{
	float3 s1 = lerp(v0, v1, s);
	float3 s2 = lerp(v2, v3, s);

	return lerp(s1, s2, t);
}

float trilerp(in float v0, in float v1, in float v2, in float v3, in float v4, in float v5, in float v6, in float v7, in float s, in float t, in float r)
{
	float s1 = lerp(v0, v1, s);
	float s2 = lerp(v2, v3, s);
	float s3 = lerp(v4, v5, s);
	float s4 = lerp(v6, v7, s);

	float t1 = lerp(s1, s2, t);
	float t2 = lerp(s3, s4, t);

	return lerp(t1, t2, r);
}

float3 trilerpv(in float3 v0, in float3 v1, in float3 v2, in float3 v3, in float3 v4, in float3 v5, in float3 v6, in float3 v7, in float s, in float t, in float r)
{
	float3 s1 = lerp(v0, v1, s);
	float3 s2 = lerp(v2, v3, s);
	float3 s3 = lerp(v4, v5, s);
	float3 s4 = lerp(v6, v7, s);

	float3 t1 = lerp(s1, s2, t);
	float3 t2 = lerp(s3, s4, t);

	return lerp(t1, t2, r);
}

int phash1(in int k)
{
	return asint(inthash1(asuint(k)));
}

int phash2(in int2 k)
{
	return asint(inthash2(asuint(k)));
}

int phash3(in int3 k)
{
	return asint(inthash3(asuint(k)));
}

int phash4(in int4 k)
{
	uint4 ku = asuint(k);
	return asint(inthash4(ku.xyz, ku.w));
}

int3 vhash1(in int k)
{
	uint h = inthash1(asuint(k));
	
	int3 result;
	result.x = (h) & 0xFF;
	result.y = (h >> 8) & 0xFF;
	result.z = (h >> 16) & 0xFF;
	return result;
}

int3 vhash2(in int2 k)
{
	uint h = inthash2(asuint(k));

	int3 result;
	result.x = (h) & 0xFF;
	result.y = (h >> 8) & 0xFF;
	result.z = (h >> 16) & 0xFF;
	return result;
}

int3 vhash3(in int3 k)
{
	uint h = inthash3(asuint(k));

	int3 result;
	result.x = (h) & 0xFF;
	result.y = (h >> 8) & 0xFF;
	result.z = (h >> 16) & 0xFF;
	return result;
}

int3 vhash4(in int4 k)
{
	uint4 ku = asuint(k);
	uint h = inthash4(ku.xyz, ku.w);

	int3 result;
	result.x = (h) & 0xFF;
	result.y = (h >> 8) & 0xFF;
	result.z = (h >> 16) & 0xFF;
	return result;
}

float perlin_1_1(in float x)
{
	int X; float fx = floorfrac1(x, X);
	float u = fade1(fx);

	float result = lerp(
			grad1(phash1(X), fx), 
			grad1(phash1(X + 1), fx - 1.0f), 
			u);

	return scale1(result);
}

float perlin_1_2(in float2 x)
{
	int2 X; float2 fx = floorfrac2(x, X);
	float2 u = fade2(fx);

	float result = bilerp(
			grad2(phash2(X), fx.x, fx.y),
			grad2(phash2(X + int2(1, 0)), fx.x - 1.0f, fx.y),
			grad2(phash2(X + int2(0, 1)), fx.x, fx.y - 1.0f),
			grad2(phash2(X + int2(1, 1)), fx.x - 1.0f, fx.y - 1.0f),
			u.x, u.y);

	return scale2(result);
}

float perlin_1_3(in float3 x)
{
	int3 X; float3 fx = floorfrac3(x, X);
	float3 u = fade3(fx);

	float result = trilerp(
			grad3(phash3(X), fx.x, fx.y, fx.z),
			grad3(phash3(X + int3(1, 0, 0)), fx.x - 1.0f, fx.y, fx.z),
			grad3(phash3(X + int3(0, 1, 0)), fx.x, fx.y - 1.0f, fx.z),
			grad3(phash3(X + int3(1, 1, 0)), fx.x - 1.0f, fx.y - 1.0f, fx.z),
			grad3(phash3(X + int3(0, 0, 1)), fx.x, fx.y, fx.z - 1.0f),
			grad3(phash3(X + int3(1, 0, 1)), fx.x - 1.0f, fx.y, fx.z - 1.0f),
			grad3(phash3(X + int3(0, 1, 1)), fx.x, fx.y - 1.0f, fx.z - 1.0f),
			grad3(phash3(X + int3(1, 1, 1)), fx.x - 1.0f, fx.y - 1.0f, fx.z - 1.0f),
			u.x, u.y, u.z);
	
	return scale3(result);
}

float perlin_1_4(in float4 x)
{
	int4 X; float4 fx = floorfrac4(x, X);
	float4 u = fade4(fx);

	float result = lerp(
		trilerp(
			grad4(phash4(X), fx.x, fx.y, fx.z, fx.w),
			grad4(phash4(X + int4(1, 0, 0, 0)), fx.x - 1.0f, fx.y, fx.z, fx.w),
			grad4(phash4(X + int4(0, 1, 0, 0)), fx.x, fx.y - 1.0f, fx.z, fx.w),
			grad4(phash4(X + int4(1, 1, 0, 0)), fx.x - 1.0f, fx.y - 1.0f, fx.z, fx.w),
			grad4(phash4(X + int4(0, 0, 1, 0)), fx.x, fx.y, fx.z - 1.0f, fx.w),
			grad4(phash4(X + int4(1, 0, 1, 0)), fx.x - 1.0f, fx.y, fx.z - 1.0f, fx.w),
			grad4(phash4(X + int4(0, 1, 1, 0)), fx.x, fx.y - 1.0f, fx.z - 1.0f, fx.w),
			grad4(phash4(X + int4(1, 1, 1, 0)), fx.x - 1.0f, fx.y - 1.0f, fx.z - 1.0f, fx.w),
			u.x, u.y, u.z),
		trilerp(
			grad4(phash4(X + int4(0, 0, 0, 1)), fx.x, fx.y, fx.z, fx.w - 1.0f),
			grad4(phash4(X + int4(1, 0, 0, 1)), fx.x - 1.0f, fx.y, fx.z, fx.w - 1.0f),
			grad4(phash4(X + int4(0, 1, 0, 1)), fx.x, fx.y - 1.0f, fx.z, fx.w - 1.0f),
			grad4(phash4(X + int4(1, 1, 0, 1)), fx.x - 1.0f, fx.y - 1.0f, fx.z, fx.w - 1.0f),
			grad4(phash4(X + int4(0, 0, 1, 1)), fx.x, fx.y, fx.z - 1.0f, fx.w - 1.0f),
			grad4(phash4(X + int4(1, 0, 1, 1)), fx.x - 1.0f, fx.y, fx.z - 1.0f, fx.w - 1.0f),
			grad4(phash4(X + int4(0, 1, 1, 1)), fx.x, fx.y - 1.0f, fx.z - 1.0f, fx.w - 1.0f),
			grad4(phash4(X + int4(1, 1, 1, 1)), fx.x - 1.0f, fx.y - 1.0f, fx.z - 1.0f, fx.w - 1.0f),
			u.x, u.y, u.z),
		u.w);

	return scale4(result);
}

float3 perlin_3_1(in float x)
{
	int X; float fx = floorfrac1(x, X);
	float u = fade1(fx);

	float3 result = lerp(
			vgrad1(vhash1(X), fx),
			vgrad1(vhash1(X + 1), fx - 1.0f),
			u);
	
	return scale1v(result);
}

float3 perlin_3_2(in float2 x)
{
	int2 X; float2 fx = floorfrac2(x, X);
	float2 u = fade2(fx);

	float3 result = bilerpv(
			vgrad2(vhash2(X), fx.x, fx.y),
			vgrad2(vhash2(X + int2(1, 0)), fx.x - 1.0f, fx.y),
			vgrad2(vhash2(X + int2(0, 1)), fx.x, fx.y - 1.0f),
			vgrad2(vhash2(X + int2(1, 1)), fx.x - 1.0f, fx.y - 1.0f),
			u.x, u.y);
	
	return scale2v(result);
}

float3 perlin_3_3(in float3 x)
{
	int3 X; float3 fx = floorfrac3(x, X);
	float3 u = fade3(fx);

	float3 result = trilerpv(
		vgrad3(vhash3(X), fx.x, fx.y, fx.z),
		vgrad3(vhash3(X + int3(1, 0, 0)), fx.x - 1.0f, fx.y, fx.z),
		vgrad3(vhash3(X + int3(0, 1, 0)), fx.x, fx.y - 1.0f, fx.z),
		vgrad3(vhash3(X + int3(1, 1, 0)), fx.x - 1.0f, fx.y - 1.0f, fx.z),
		vgrad3(vhash3(X + int3(0, 0, 1)), fx.x, fx.y, fx.z - 1.0f),
		vgrad3(vhash3(X + int3(1, 0, 1)), fx.x - 1.0f, fx.y, fx.z - 1.0f),
		vgrad3(vhash3(X + int3(0, 1, 1)), fx.x, fx.y - 1.0f, fx.z - 1.0f),
		vgrad3(vhash3(X + int3(1, 1, 1)), fx.x - 1.0f, fx.y - 1.0f, fx.z - 1.0f),
		u.x, u.y, u.z);

	return scale3v(result);
}

float3 perlin_3_4(in float4 x)
{
	int4 X; float4 fx = floorfrac4(x, X);
	float4 u = fade4(fx);

	float3 result = lerp(
		trilerpv(
			vgrad4(vhash4(X), fx.x, fx.y, fx.z, fx.w),
			vgrad4(vhash4(X + int4(1, 0, 0, 0)), fx.x - 1.0f, fx.y, fx.z, fx.w),
			vgrad4(vhash4(X + int4(0, 1, 0, 0)), fx.x, fx.y - 1.0f, fx.z, fx.w),
			vgrad4(vhash4(X + int4(1, 1, 0, 0)), fx.x - 1.0f, fx.y - 1.0f, fx.z, fx.w),
			vgrad4(vhash4(X + int4(0, 0, 1, 0)), fx.x, fx.y, fx.z - 1.0f, fx.w),
			vgrad4(vhash4(X + int4(1, 0, 1, 0)), fx.x - 1.0f, fx.y, fx.z - 1.0f, fx.w),
			vgrad4(vhash4(X + int4(0, 1, 1, 0)), fx.x, fx.y - 1.0f, fx.z - 1.0f, fx.w),
			vgrad4(vhash4(X + int4(1, 1, 1, 0)), fx.x - 1.0f, fx.y - 1.0f, fx.z - 1.0f, fx.w),
			u.x, u.y, u.z),
		trilerpv(
			vgrad4(vhash4(X + int4(0, 0, 0, 1)), fx.x, fx.y, fx.z, fx.w - 1.0f),
			vgrad4(vhash4(X + int4(1, 0, 0, 1)), fx.x - 1.0f, fx.y, fx.z, fx.w - 1.0f),
			vgrad4(vhash4(X + int4(0, 1, 0, 1)), fx.x, fx.y - 1.0f, fx.z, fx.w - 1.0f),
			vgrad4(vhash4(X + int4(1, 1, 0, 1)), fx.x - 1.0f, fx.y - 1.0f, fx.z, fx.w - 1.0f),
			vgrad4(vhash4(X + int4(0, 0, 1, 1)), fx.x, fx.y, fx.z - 1.0f, fx.w - 1.0f),
			vgrad4(vhash4(X + int4(1, 0, 1, 1)), fx.x - 1.0f, fx.y, fx.z - 1.0f, fx.w - 1.0f),
			vgrad4(vhash4(X + int4(0, 1, 1, 1)), fx.x, fx.y - 1.0f, fx.z - 1.0f, fx.w - 1.0f),
			vgrad4(vhash4(X + int4(1, 1, 1, 1)), fx.x - 1.0f, fx.y - 1.0f, fx.z - 1.0f, fx.w - 1.0f),
			u.x, u.y, u.z),
		u.w);

	return scale4v(result);
}

// hash noise implementation

float hashnoise_1_1(float k)
{
	return bits_to_01(inthash1(asuint(k)));
}

float hashnoise_1_2(float2 k)
{
	return bits_to_01(inthash2(asuint(k)));
}

float hashnoise_1_3(float3 k)
{
	return bits_to_01(inthash3(asuint(k)));
}

float hashnoise_1_4(float4 k)
{
	uint4 ku = asuint(k);
	return bits_to_01(inthash4(ku.xyz, ku.w));
}

float3 hashnoise_3_1(float k)
{
	float3 result;

	uint2 kn;
	kn.x = asuint(k);

	kn.y = 0; result.x = bits_to_01(inthash2(kn));
	kn.y = 1; result.y = bits_to_01(inthash2(kn));
	kn.y = 2; result.z = bits_to_01(inthash2(kn));

	return result;
}

float3 hashnoise_3_2(float2 k)
{
	float3 result;

	uint3 kn;
	kn.xy = asuint(k.xy);

	kn.z = 0; result.x = bits_to_01(inthash3(kn));
	kn.z = 1; result.y = bits_to_01(inthash3(kn));
	kn.z = 2; result.z = bits_to_01(inthash3(kn));

	return result;
}

float3 hashnoise_3_3(float3 k)
{
	float3 result;

	uint3 kn0;
	uint  kn1;
	kn0 = asuint(k);

	kn1 = 0; result.x = bits_to_01(inthash4(kn0, kn1));
	kn1 = 1; result.y = bits_to_01(inthash4(kn0, kn1));
	kn1 = 2; result.z = bits_to_01(inthash4(kn0, kn1));

	return result;
}

float3 hashnoise_3_4(float4 k)
{
	float3 result;

	uint3 kn0;
	uint2  kn1;
	kn0 = asuint(k.xyz);
	kn1.x = asuint(k.w);

	kn1.y = 0; result.x = bits_to_01(inthash5(kn0, kn1));
	kn1.y = 1; result.y = bits_to_01(inthash5(kn0, kn1));
	kn1.y = 2; result.z = bits_to_01(inthash5(kn0, kn1));

	return result;
}

// simplex noise implementation

static float2 grad2lut[8] = {
	{ -1.0f, -1.0f },{ 1.0f,  0.0f },{ -1.0f, 0.0f },{ 1.0f,  1.0f },
	{ -1.0f,  1.0f },{ 0.0f, -1.0f },{ 0.0f, 1.0f },{ 1.0f, -1.0f }
};

static float3 grad3lut[16] = {
	{ 1.0f,  0.0f,  1.0f },{ 0.0f,  1.0f,  1.0f }, 
	{ -1.0f,  0.0f,  1.0f },{ 0.0f, -1.0f,  1.0f },
	{ 1.0f,  0.0f, -1.0f },{ 0.0f,  1.0f, -1.0f },
	{ -1.0f,  0.0f, -1.0f },{ 0.0f, -1.0f, -1.0f },
	{ 1.0f, -1.0f,  0.0f },{ 1.0f,  1.0f,  0.0f },
	{ -1.0f,  1.0f,  0.0f },{ -1.0f, -1.0f,  0.0f },
	{ 1.0f,  0.0f,  1.0f },{ -1.0f,  0.0f,  1.0f },
	{ 0.0f,  1.0f, -1.0f },{ 0.0f, -1.0f, -1.0f }
};

static float4 grad4lut[32] = {
	{ 0.0f, 1.0f, 1.0f, 1.0f },{ 0.0f, 1.0f, 1.0f, -1.0f },{ 0.0f, 1.0f, -1.0f, 1.0f },{ 0.0f, 1.0f, -1.0f, -1.0f },
	{ 0.0f, -1.0f, 1.0f, 1.0f },{ 0.0f, -1.0f, 1.0f, -1.0f },{ 0.0f, -1.0f, -1.0f, 1.0f },{ 0.0f, -1.0f, -1.0f, -1.0f },
	{ 1.0f, 0.0f, 1.0f, 1.0f },{ 1.0f, 0.0f, 1.0f, -1.0f },{ 1.0f, 0.0f, -1.0f, 1.0f },{ 1.0f, 0.0f, -1.0f, -1.0f },
	{ -1.0f, 0.0f, 1.0f, 1.0f },{ -1.0f, 0.0f, 1.0f, -1.0f },{ -1.0f, 0.0f, -1.0f, 1.0f },{ -1.0f, 0.0f, -1.0f, -1.0f },
	{ 1.0f, 1.0f, 0.0f, 1.0f },{ 1.0f, 1.0f, 0.0f, -1.0f },{ 1.0f, -1.0f, 0.0f, 1.0f },{ 1.0f, -1.0f, 0.0f, -1.0f },
	{ -1.0f, 1.0f, 0.0f, 1.0f },{ -1.0f, 1.0f, 0.0f, -1.0f },{ -1.0f, -1.0f, 0.0f, 1.0f },{ -1.0f, -1.0f, 0.0f, -1.0f },
	{ 1.0f, 1.0f, 1.0f, 0.0f },{ 1.0f, 1.0f, -1.0f, 0.0f },{ 1.0f, -1.0f, 1.0f, 0.0f },{ 1.0f, -1.0f, -1.0f, 0.0f },
	{ -1.0f, 1.0f, 1.0f, 0.0f },{ -1.0f, 1.0f, -1.0f, 0.0f },{ -1.0f, -1.0f, 1.0f, 0.0f },{ -1.0f, -1.0f, -1.0f, 0.0f }
};

static uint simplex[64][4] = {
	{ 0,1,2,3 },{ 0,1,3,2 },{ 0,0,0,0 },{ 0,2,3,1 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 1,2,3,0 },
	{ 0,2,1,3 },{ 0,0,0,0 },{ 0,3,1,2 },{ 0,3,2,1 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 1,3,2,0 },
	{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },
	{ 1,2,0,3 },{ 0,0,0,0 },{ 1,3,0,2 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 2,3,0,1 },{ 2,3,1,0 },
	{ 1,0,2,3 },{ 1,0,3,2 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 2,0,3,1 },{ 0,0,0,0 },{ 2,1,3,0 },
	{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },
	{ 2,0,1,3 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 3,0,1,2 },{ 3,0,2,1 },{ 0,0,0,0 },{ 3,1,2,0 },
	{ 2,1,0,3 },{ 0,0,0,0 },{ 0,0,0,0 },{ 0,0,0,0 },{ 3,1,0,2 },{ 0,0,0,0 },{ 3,2,0,1 },{ 3,2,1,0 } };

uint scramble(uint v0, uint v1, uint v2)
{
	return bjfinal(v0, v1, v2 ^ 3735928559);
}

float sgrad1(int i, int seed)
{
	int   h = scramble(asuint(i), seed, 0);
	float g = 1.0f + (h & 7);
	if (h & 8)
		g = -g;

	return g;
}

float2 sgrad2(int2 i, int seed)
{
	uint2 ui = asuint(i);
	int h = scramble(ui.x, ui.y, seed);
	return grad2lut[h & 7];
}

float3 sgrad3(int3 i, int seed)
{
	uint3 ui = asuint(i);
	int h = scramble(ui.x, ui.y, scramble(ui.z, seed, 0));
	return grad3lut[h & 15];
}

float4 sgrad4(int4 i, int seed)
{
	uint4 ui = asuint(i);
	int h = scramble(ui.x, ui.y, scramble(ui.z, ui.w, seed));
	return grad4lut[h & 31];
}

float simplexnoise_1_1(float x, int seed)
{
	int i0 = floor(x);
	int i1 = i0 + 1;
	
	float2 xn;
	xn.x = x - i0;
	xn.y = xn.x - 1.0f;

	float2 x2 = xn * xn;
	float2 t  = (float2)1.0f - x2;
	float2 t2 = t * t;
	float2 gx = float2(sgrad1(i0, seed), sgrad1(i1, seed));
	float2 n  = t2 * t2 * gx * xn;

	const float scale = 0.36f;
	return scale * (n.x + n.y);
}

float simplexnoise_1_2(float2 x, int seed)
{
	const float2 F2 = (float2)0.366025403;
	const float2 G2 = (float2)0.211324865;

	float2 ss = x + (float2)dot(x, F2);
	int2 i = floor(ss);

	float  t  = dot(i, G2);
	float2 X0  = i - (float2)t;
	float2 x0 = x - X0;

	int2 i1;
	if (x0.x > x0.y) {
		i1 = int2(1, 0);
	}
	else {
		i1 = int2(0, 1);
	}
	
	float2 x1 = x0 - i1 + G2;
	float2 x2 = x0 - (float2)1.0f + 2.0f * G2;

	float3 n = 0;

	float t0 = 0.5f - dot(x0, x0);
	if (t0 >= 0.0f) {
		float2 g0 = sgrad2(i, seed);
		float t20 = t0 * t0;
		n.x = t20 * t20 * dot(g0, x0);
	}

	float t1 = 0.5f - dot(x1, x1);
	if (t1 >= 0.0f) {
		float2 g1 = sgrad2(i + i1, seed);
		float t21 = t1 * t1;
		n.y = t21 * t21 * dot(g1, x1);
	}

	float t2 = 0.5f - dot(x2, x2);
	if (t2 >= 0.0f) {
		float2 g2 = sgrad2(i + (int2)1, seed);
		float t22 = t2 * t2;
		n.z = t22 * t22 * dot(g2, x2);
	}

	const float scale = 64.0f;
	return scale * dot(n, (float3)1.f);
}

float simplexnoise_1_3(float3 x, int seed)
{
	const float3 F3 = (float3)0.333333333;
	const float3 G3 = (float3)0.166666667;

	float s = dot(x, F3);
	float3 ss = x + (float3)s;
	int3 i = floor(ss);

	float t = dot(i, G3);
	float3 X0 = i - (float3)t;
	float3 x0 = x - X0;

	int3 i1; 
	int3 i2; 

	if (x0.x >= x0.y) {
		if (x0.y >= x0.z) {
			i1 = int3(1, 0, 0); i2 = int3( 1, 1, 0);  
		}
		else if (x0.x >= x0.z) {
			i1 = int3(1, 0, 0); i2 = int3(1, 0, 1);
		}
		else {
			i1 = int3(0, 0, 1); i2 = int3(1, 0, 1);
		}
	}
	else { 
		if (x0.y<x0.z) {
			i1 = int3(0, 0, 1); i2 = int3(0, 1, 1);
		}
		else if (x0.x<x0.z) {
			i1 = int3(0, 1, 0); i2 = int3(0, 1, 1);
		}
		else {
			i1 = int3(0, 1, 0); i2 = int3(1, 1, 0);
		}
	}

	float3 x1 = x0 - i1 + G3;
	float3 x2 = x0 - i2 + 2.0f * G3;
	float3 x3 = x0 - (float3) 1.0f + 3.0f * G3;

	float4 n = 0.f; 
													  
	float t0 = 0.5f - dot(x0, x0);
	if (t0 >= 0.0f) {
		float3 g0 = sgrad3(i, seed);
		float t20 = t0 * t0;
		n.x = t20 * t20 * dot(g0, x0);
	}

	float t1 = 0.5f - dot(x1, x1);
	if (t1 >= 0.0f) {
		float3 g1 = sgrad3(i + i1, seed);
		float t21 = t1 * t1;
		n.y = t21 * t21 * dot(g1, x1);
	}

	float t2 = 0.5f - dot(x2, x2);
	if (t2 >= 0.0f) {
		float3 g2 = sgrad3(i + i2, seed);
		float t22 = t2 * t2;
		n.z = t22 * t22 * dot(g2, x2);
	}

	float t3 = 0.5f - dot(x3, x3);
	if (t3 >= 0.0f) {
		float3 g3 = sgrad3(i + (float3)1, seed);
		float t23 = t3 * t3;
		n.w = t23 * t23 * dot(g3, x3);
	}

	const float scale = 68.0f;
	return scale * dot(n, (float4)1.f);
}

float simplexnoise_1_4(float4 x, int seed)
{
	const float4 F4 = (float4)0.309016994;
	const float4 G4 = (float4)0.138196601;

	float4 n = 0.f;
	float n1 = 0.f;

	float s = dot(x, F4);	
	float4 ss = x + (float4)s;
	int4 i = floor(ss);

	float t = dot(i, G4);
	float4 X0 = i - (float4)t;
	float4 x0 = x - X0;

	int c1 = (x0.x > x0.y) ? 32 : 0;
	int c2 = (x0.x > x0.z) ? 16 : 0;
	int c3 = (x0.y > x0.z) ? 8 : 0;
	int c4 = (x0.x > x0.w) ? 4 : 0;
	int c5 = (x0.y > x0.w) ? 2 : 0;
	int c6 = (x0.z > x0.w) ? 1 : 0;
	int c = c1 | c2 | c3 | c4 | c5 | c6; 

	int4 i1; 
	int4 i2; 
	int4 i3; 

	i1.x = simplex[c][0] >= 3 ? 1 : 0;
	i1.y = simplex[c][1] >= 3 ? 1 : 0;
	i1.z = simplex[c][2] >= 3 ? 1 : 0;
	i1.w = simplex[c][3] >= 3 ? 1 : 0;
	i2.x = simplex[c][0] >= 2 ? 1 : 0;
	i2.y = simplex[c][1] >= 2 ? 1 : 0;
	i2.z = simplex[c][2] >= 2 ? 1 : 0;
	i2.w = simplex[c][3] >= 2 ? 1 : 0;
	i3.x = simplex[c][0] >= 1 ? 1 : 0;
	i3.y = simplex[c][1] >= 1 ? 1 : 0;
	i3.z = simplex[c][2] >= 1 ? 1 : 0;
	i3.w = simplex[c][3] >= 1 ? 1 : 0;

	float4 x1 = x0 - i1 + G4;
	float4 x2 = x0 - i2 + 2.0f * G4;
	float4 x3 = x0 - i3 + 3.0f * G4;
	float4 x4 = x0 - (float4)1.0f + 4.0f * G4;

	float t0 = 0.5f - dot(x0, x0);
	if (t0 >= 0.0f) {
		float t20 = t0 * t0;
		float4 g0 = sgrad4(i, seed);
		n.x = t20 * t20 * dot(g0, x0);
	}

	float t1 = 0.5f - dot(x1, x1);
	if (t1 >= 0.0f) {
		float t21 = t1 * t1;
		float4 g1 = sgrad4(i + i1, seed);
		n.y = t21 * t21 * dot(g1, x1);
	}

	float t2 = 0.5f - dot(x2, x2);
	if (t2 >= 0.0f) {
		float t22 = t2 * t2;
		float4 g2 = sgrad4(i + i2, seed);
		n.z = t22 * t22 * dot(g2, x2);
	}

	float t3 = 0.5f - dot(x3, x3);
	if (t3 >= 0.0f) {
		float t23 = t3 * t3;
		float4 g3 = sgrad4(i + i3, seed);
		n.w = t23 * t23 * dot(g3, x3);
	}

	float t4 = 0.5f - dot(x4, x4);
	if (t4 >= 0.0f) {
		float t24 = t4 * t4;
		float4 g4 = sgrad4(i + (float4)1, seed);
		n1 = t24 * t24 * dot(g4, x4);
	}

	const float scale = 54.0f;
	return scale * (dot(n, (float4)1.f) + n1);
}

float3 simplexnoise_3_1(float x, int unused)
{
	return float3(simplexnoise_1_1(x, 0), simplexnoise_1_1(x, 1), simplexnoise_1_1(x, 2));
}

float3 simplexnoise_3_2(float2 x, int unused)
{
	return float3(simplexnoise_1_2(x, 0), simplexnoise_1_2(x, 1), simplexnoise_1_2(x, 2));
}

float3 simplexnoise_3_3(float3 x, int unused)
{
	return float3(simplexnoise_1_3(x, 0), simplexnoise_1_3(x, 1), simplexnoise_1_3(x, 2));
}

float3 simplexnoise_3_4(float4 x, int unused)
{
	return float3(simplexnoise_1_4(x, 0), simplexnoise_1_4(x, 1), simplexnoise_1_4(x, 2));
}